from transformers import AutoTokenizer, AutoModel, pipeline
from transformers.models.marian.modeling_marian import MarianMTModel,MarianModel

pretrain_model_dir = '/media/dengyunfei/6T/data/models/huggingface/opus-mt-en-zh'
pipe = pipeline("translation_en_to_zh", model=pretrain_model_dir, device=1)
print(
    'The pipeline recognized pretrain model is {}'.format(pipe.model.__class__))

tokenizer = AutoTokenizer.from_pretrained(pretrain_model_dir)

auto_model = AutoModel.from_pretrained(pretrain_model_dir)
print('The automatic recognized pretrain model is {}'.format(auto_model.__class__))
# AttributeError: 'TranslationPipeline' object has no attribute 'prefix' 如果使用这个自动识别的类 MarianModel

# 为什么需要明确使用 MarianMTModel 来进行模型的识别，而不是采用 AutoModel 来自动识别
model = MarianMTModel.from_pretrained(pretrain_model_dir)
print('The manual assign pretrain model is {}'.format(model.__class__))

pipe1 = pipeline("translation_en_to_zh", model=model, tokenizer=tokenizer, device=1)

print(pipe("I am a happy little girl"))
print(pipe1("I am a happy little girl"))

# 查看 pipeline 的默认的config
